Radu Ioan-Alexandru (Gr 405) Stefan Eva (Gr 405)
09 ianuarie 2019
# See the structures of datas
str(gold)## Time-Series [1:1108] from 1 to 1108: 306 300 303 297 304 ...
str(woolyrnq)## Time-Series [1:119] from 1965 to 1994: 6172 6709 6633 6660 6786 ...
str(gas)## Time-Series [1:476] from 1956 to 1996: 1709 1646 1794 1878 2173 ...
# a. Use autoplot to plot each of these in separate plots.
autoplot(gold)autoplot(woolyrnq)autoplot(gas)writeLines("")# b. What is the frequency of each commodity series? Hint: apply the frequency() function.
print("Frequency")## [1] "Frequency"
print("gold")## [1] "gold"
frequency(gold)## [1] 1
print("woolyrnq")## [1] "woolyrnq"
frequency(woolyrnq)## [1] 4
print("gas")## [1] "gas"
frequency(gas)## [1] 12
writeLines("")# c. Use which.max() to spot the outlier in the gold series. Which observation was it?
print("When gold got maximum value?")## [1] "When gold got maximum value?"
which.max(gold)## [1] 770
print("What was the gold's maximum value?")## [1] "What was the gold's maximum value?"
gold[which.max(gold)]## [1] 593.7
# a. You can read the data into R with the following script:
tute1 <- read.csv("tute1.csv", header=TRUE)
View(tute1)# b. Convert the data to time series
mytimeseries <- ts(tute1[,-1], start=1981, frequency=4)
# (The [,-1] removes the first column which contains the quarters as we don't need them now.)# c. Construct time series plots of each of the three series
autoplot(mytimeseries, facets=TRUE)# Check what happens when you don't include facets=TRUE.
autoplot(mytimeseries)# a. You can read the data into R with the following script:
retaildata <- readxl::read_excel("retail.xlsx", skip=1)
View(retaildata)
# The second argument (skip=1) is required because the Excel sheet has two header rows.# b. Select one of the time series as follows (but replace the column name with your own chosen column):
View(retaildata)
myts <- ts(retaildata[,"A3349873A"], frequency=12, start=c(1982,4))
View(retaildata)# c. Explore your chosen retail time series using the following functions: autoplot, ggseasonplot, ggsubseriesplot, gglagplot, ggAcf
#Can you spot any seasonality, cyclicity and trend? What do you learn about the series?
autoplot(myts)ggseasonplot(myts)ggsubseriesplot(myts)gglagplot(myts, lags = 12)ggAcf(myts)# Seasonality and trend of the data can be seen# - Use help() to find out about the data in each series.
help(bicoal)## starting httpd help server ... done
help(chicken)
help(dole)
help(usdeaths)
help(lynx)
help(goog)
help(writing)
help(fancy)
help(a10)
help(h02)autoplot(bicoal)autoplot(chicken)autoplot(dole)autoplot(usdeaths)autoplot(lynx)autoplot(goog)autoplot(writing)autoplot(fancy)autoplot(a10)autoplot(h02)# - For the goog plot, modify the axis labels and title.
autoplot(goog) +
ggtitle("Daily closing stock prices of Google Inc.") +
xlab("Time") +
ylab("Price(Unit: US$)")# - What can you say about the seasonal patterns?
# - Can you identify any unusual years?
ggseasonplot(writing)ggsubseriesplot(writing)# The sales amount of paper falls down in August annually
ggseasonplot(fancy)ggsubseriesplot(fancy)# In December, 1992 the monthly sales for a souvenir shop increased dramatically compared to the same month of the last year
ggseasonplot(a10)ggsubseriesplot(a10)# The amount of antidiabetes monthly scripts falls down in February annually
ggseasonplot(h02)ggsubseriesplot(h02)# The amount of corticosteroid monthly scripts also falls down in February annually# - Can you spot any seasonality, cyclicity and trend?
# - What do you learn about the series?
autoplot(hsales)ggseasonplot(hsales)ggsubseriesplot(hsales)gglagplot(hsales)ggAcf(hsales, lag.max = 400)# can spot seasonality and cyclicity. The cycle period is about 4 years(100 months)autoplot(usdeaths)ggseasonplot(usdeaths)ggsubseriesplot(usdeaths)gglagplot(usdeaths)ggAcf(usdeaths, lag.max = 60)# can spot seasonalityautoplot(bricksq)ggseasonplot(bricksq)ggsubseriesplot(bricksq)gglagplot(bricksq)ggAcf(bricksq, lag.max = 200)# can spot little seasonality and strong trendautoplot(sunspotarea)# ggseasonplot(sunspotarea)
# not seasonal, can't draw it
# ggsubseriesplot(sunspotarea)
# not seasonal, useless to draw it
gglagplot(sunspotarea)ggAcf(sunspotarea, lag.max = 50)# can spot strong cyclicityautoplot(gasoline)ggseasonplot(gasoline)# ggsubseriesplot(gasoline)
# The number of weeks is 52 and it looked like it is too much for subseriesplot
gglagplot(gasoline)ggAcf(gasoline, lag.max = 1000)# can spot seasonality and trend# See the structure of arrivals
str(arrivals)## Time-Series [1:127, 1:4] from 1981 to 2012: 14.76 9.32 10.17 19.51 17.12 ...
## - attr(*, "dimnames")=List of 2
## ..$ : NULL
## ..$ : chr [1:4] "Japan" "NZ" "UK" "US"
# - Use autoplot, ggseasonplot and ggsubseriesplot to compare the differences between the arrivals from these four countries.
# - Can you identify any unusual observations?autoplot(arrivals)ggseasonplot(arrivals[, "Japan"])ggseasonplot(arrivals[, "NZ"])ggseasonplot(arrivals[, "UK"])ggseasonplot(arrivals[, "US"])ggsubseriesplot(arrivals[, "Japan"])ggsubseriesplot(arrivals[, "NZ"])ggsubseriesplot(arrivals[, "UK"])ggsubseriesplot(arrivals[, "US"])# The arrivals from Japan decrease in the 2nd quarter.
# The arrivals from New Zealand are highest in the 3rd quarter and lowest in the 1st quarter.
# The arrivals from UK are fewer in the 2nd quarter and increasing in the third quarter.
# The arrivals from US are fewer in the 2nd quarter and like the ones from UK, they are increasing in the 3rd quarter.mypigs <- window(pigs, start=1990)
#str(mypigs)
autoplot(mypigs)ggAcf(mypigs)# We can see that the autocorrelation values were outside of bounds, so there isn't probably white noise present.ddj <- diff(dj)
#str(ddj)
autoplot(ddj)ggAcf(ddj)# Only a small part of the autocorrelation values were outside bounds, so there probably can be white noise.# - usnetelec
lUsnetelec <- BoxCox.lambda(usnetelec)
print(c("Good value of lambda for usnetelec: ",
lUsnetelec))## [1] "Good value of lambda for usnetelec: "
## [2] "0.516771443964645"
autoplot(BoxCox(usnetelec, lUsnetelec))# - usgdp
lUsgdp <- BoxCox.lambda(usgdp)
print(c("Good value of lambda for usgdp: ",
lUsgdp))## [1] "Good value of lambda for usgdp: " "0.366352049520934"
autoplot(BoxCox(usgdp, lUsgdp))# - mcopper
lMcopper <- BoxCox.lambda(mcopper)
print(c("Good value of lambda for mcopper: ",
lMcopper))## [1] "Good value of lambda for mcopper: "
## [2] "0.191904709003829"
autoplot(BoxCox(mcopper, lMcopper))# - enplanements
lEnplanements <- BoxCox.lambda(enplanements)
print(c("Good value of lambda for enplanements: ",
lEnplanements))## [1] "Good value of lambda for enplanements: "
## [2] "-0.226946111237065"
autoplot(BoxCox(enplanements, lEnplanements))autoplot(cangas)lCangas <- BoxCox.lambda(cangas)
autoplot(BoxCox(cangas, lCangas))# You can see that the Box-Cox transformation doesn't yield simpler modelretaildata <- xlsx::read.xlsx("retail.xlsx", sheetIndex = 1, startRow = 2)
myts <- ts(retaildata[,"A3349873A"], frequency=12, start=c(1982,4))
lRetail <- BoxCox.lambda(myts)
print(c("selected lambda:", lRetail))## [1] "selected lambda:" "0.127636859661548"
fc_retail <- rwf(myts,
drift = TRUE,
lambda = lRetail,
h = 50,
level = 80)
fc_retail_biasadj <- rwf(myts,
drift = TRUE,
lambda = lRetail,
h = 50,
level = 80,
biasadj = TRUE)
autoplot(myts) +
autolayer(fc_retail, series = "Drift method with Box-Cox Transformation") +
autolayer(fc_retail_biasadj$mean, series = "Bias Adjusted") +
guides(colour = guide_legend(title = "Forecast"))# It is better to choose a bias adjusted Box-Cox Transformation with lambda = 0.128autoplot(dole)lDole <- BoxCox.lambda(dole)
autoplot(BoxCox(dole, lDole))# For dole data, it looks like using Box-Cox Transformation is better to see the pattern.autoplot(usdeaths)lUsdeaths <- BoxCox.lambda(usdeaths)
autoplot(BoxCox(usdeaths, lUsdeaths))# For usdeaths data, it looks like it is meaningless to transform it.autoplot(bricksq)lBricksq <- BoxCox.lambda(bricksq)
autoplot(BoxCox(bricksq, lBricksq))# For bricksq data, it looks like it is meaningless to transform itbeer <- window(ausbeer, start = 1992)
fc <- snaive(beer)
autoplot(fc)res <- residuals(fc)
autoplot(res)checkresiduals(fc)##
## Ljung-Box test
##
## data: Residuals from Seasonal naive method
## Q* = 32.269, df = 8, p-value = 8.336e-05
##
## Model df: 0. Total lags used: 8
# The ACF plot result indicates that the residuals aren't white noise because of significant spike at lag 4.
# Ljung-Box test shows that it is statistically significant.
# Therefore they aren't white noise and they aren't normally distributed.snaiveWww <- snaive(WWWusage, h = 15)
autoplot(snaiveWww)checkresiduals(snaiveWww)##
## Ljung-Box test
##
## data: Residuals from Seasonal naive method
## Q* = 145.58, df = 10, p-value < 2.2e-16
##
## Model df: 0. Total lags used: 10
naiveWww <- naive(WWWusage)
autoplot(naiveWww)checkresiduals(naiveWww)##
## Ljung-Box test
##
## data: Residuals from Naive method
## Q* = 145.58, df = 10, p-value < 2.2e-16
##
## Model df: 0. Total lags used: 10
# The ACF plot indicates that the residuals aren't white noise because of existence of significant spikes.
# Ljung-Box test shows that they are statistically significant for both of methods.
# Therefore they aren't white noise. And the distribution of residuals isn't normal, too.
# We will choose the naive method because there isn't any particular seasonal pattern in the data and the Q values of Ljung-Box test were same for both methods.snaiveBricksq <- snaive(bricksq)
autoplot(snaiveBricksq)checkresiduals(snaiveBricksq)##
## Ljung-Box test
##
## data: Residuals from Seasonal naive method
## Q* = 233.2, df = 8, p-value < 2.2e-16
##
## Model df: 0. Total lags used: 8
naiveBricksq <- naive(bricksq)
autoplot(naiveBricksq)checkresiduals(naiveBricksq)##
## Ljung-Box test
##
## data: Residuals from Naive method
## Q* = 244.43, df = 8, p-value < 2.2e-16
##
## Model df: 0. Total lags used: 8
# For both of methods, Ljung-Box test results show that the residuals aren't white noise. And the residuals didn't have normal distribution, too.
# We will choose snaive method because we can see the seasonality in the data and the Q value of Ljung-Box of snaive methods was less than the value of naive method.answer
# a. Split the data into two parts using
myts.train <- window(myts, end=c(2010,12))
myts.test <- window(myts, start=2011)# b. Check that your data have been split appropriately by producing the following plot.
autoplot(myts) +
autolayer(myts.train, series="Training") +
autolayer(myts.test, series="Test")# c. Calculate forecasts using snaive applied to myts.train.
snaive_myts_train <- snaive(myts.train)# d. Compare the accuracy of your forecasts against the actual values stored in myts.test.
accuracy(snaive_myts_train, myts.test)## ME RMSE MAE MPE MAPE MASE
## Training set 7.772973 20.24576 15.95676 4.702754 8.109777 1.000000
## Test set 55.300000 71.44309 55.78333 14.900996 15.082019 3.495907
## ACF1 Theil's U
## Training set 0.7385090 NA
## Test set 0.5315239 1.297866
# e. Check the residuals. Do the residuals appear to be uncorrelated and normally distributed?
checkresiduals(snaive_myts_train)##
## Ljung-Box test
##
## data: Residuals from Seasonal naive method
## Q* = 624.45, df = 24, p-value < 2.2e-16
##
## Model df: 0. Total lags used: 24
# residuals were correlated with each other and not normally distributed# f. How sensitive are the accuracy measures to the training/test split?
# I thought of sensitivity as the ratio of the test set error to the train set error. When I used the definition, it looked like Mean Error is highly sensitive, RMSE, MAE, MPE, MASE are sensitive and MAPE and ACF1 aren't much sensitive.
# I don't know whether this method is what the author wanted to solve this question about sensitivity.# a. Use window() to create three training sets for visnights[,"QLDMetro"], omitting the last 1, 2 and 3 years; call these train1, train2, and train3, respectively. For example train1 <- window(visnights[, "QLDMetro"], end = c(2015, 4))
temp = visnights[, "QLDMetro"]
train1 <- window(temp, end = c(2015, 4))
train2 <- window(temp, end = c(2014, 4))
train3 <- window(temp, end = c(2013, 4))# b. Compute one year of forecasts for each training set using the snaive() method. Call these fc1, fc2 and fc3, respectively
fc1 <- snaive(train1, h = 4)
fc2 <- snaive(train2, h = 4)
fc3 <- snaive(train3, h = 4)# c. Use accuracy() to compare the MAPE over the three test sets. Comment on these.
test1 <- window(temp, start = c(2016, 1), end = c(2016, 4))
test2 <- window(temp, start = c(2015, 1), end = c(2015, 4))
test3 <- window(temp, start = c(2014, 1), end = c(2014, 4))
accuracy(fc1, test1)## ME RMSE MAE MPE MAPE MASE
## Training set 0.02006107 1.0462821 0.8475553 -0.2237701 7.976760 1.0000000
## Test set 0.56983879 0.9358727 0.7094002 4.6191866 6.159821 0.8369957
## ACF1 Theil's U
## Training set 0.06014484 NA
## Test set 0.09003153 0.4842061
writeLines("")accuracy(fc2, test2)## ME RMSE MAE MPE MAPE MASE
## Training set 0.01618760 1.0735582 0.8809432 -0.2744747 8.284216 1.0000000
## Test set 0.08203656 0.4117902 0.3133488 0.5875037 3.057463 0.3556969
## ACF1 Theil's U
## Training set 0.06610879 NA
## Test set -0.59903247 0.3336559
writeLines("")accuracy(fc3, test3)## ME RMSE MAE MPE MAPE MASE
## Training set -0.007455407 1.074544 0.8821694 -0.5625865 8.271365 1.0000000
## Test set 0.370832661 1.058658 0.8625501 4.0472032 8.476977 0.9777602
## ACF1 Theil's U
## Training set 0.07317746 NA
## Test set -0.36890062 1.177346
# MAPE was smallest for 2015 prediction and biggest for 2014 prediction. MAPE became smaller in 2016 prediction, but it wan't smaller than the one for 2015.# a. Produce a time plot of the series.
autoplot(dowjones)# b. Produce forecasts using the drift method and plot them.
driftDj <- rwf(dowjones, drift = TRUE)
autoplot(driftDj)# c. Show that the forecasts are identical to extending the line drawn between the first and last observations.
dj_x <- c(1, 78)
dj_y <- c(dowjones[1], dowjones[78])
lm_dj <- lm(dj_y ~ dj_x)
autoplot(driftDj) +
geom_abline(intercept = lm_dj$coefficients[1],
slope = lm_dj$coefficients[2],
colour = "red")autoplot(driftDj) +
geom_line(aes(x = c(1, 78),
y = dowjones[c(1, 78)]),
colour = "red")# d. Try using some of the other benchmark functions to forecast the same data set. Which do you think is best? Why?
checkresiduals(driftDj)##
## Ljung-Box test
##
## data: Residuals from Random walk with drift
## Q* = 35.039, df = 9, p-value = 5.865e-05
##
## Model df: 1. Total lags used: 10
meanDj <- meanf(dowjones)
autoplot(meanDj)naiveDj <- naive(dowjones)
autoplot(naiveDj)checkresiduals(naiveDj)##
## Ljung-Box test
##
## data: Residuals from Naive method
## Q* = 35.039, df = 10, p-value = 0.000123
##
## Model df: 0. Total lags used: 10
snaiveDj <- snaive(dowjones, h = 10)
autoplot(snaiveDj)# I think that naive method is best because it is really difficult to predict stock price with past observations. I think that it is safer to just take the value of last observation using naive method# a. Produce some plots of the data in order to become familiar with it.
autoplot(ibmclose)# b. Split the data into a training set of 300 observations and a test set of 69 observations.
ibm_train <- subset(ibmclose, end = 300)
ibm_test <- subset(ibmclose, start = 301)# c. Try using various benchmark methods to forecast the training set and compare the results on the test set. Which method did best?
snaive_ibm <- snaive(ibm_train, h = 69)
naive_ibm <- naive(ibm_train, h = 69)
drift_ibm <- rwf(ibm_train, drift = TRUE, h = 69)
mean_ibm <- meanf(ibm_train, h = 69)
autoplot(snaive_ibm) +
autolayer(ibm_test)autoplot(naive_ibm) +
autolayer(ibm_test)autoplot(drift_ibm) +
autolayer(ibm_test)autoplot(mean_ibm) +
autolayer(ibm_test)writeLines("Snaive method")## Snaive method
accuracy(snaive_ibm, ibm_test)## ME RMSE MAE MPE MAPE MASE
## Training set -0.2809365 7.302815 5.09699 -0.08262872 1.115844 1.000000
## Test set -3.7246377 20.248099 17.02899 -1.29391743 4.668186 3.340989
## ACF1 Theil's U
## Training set 0.1351052 NA
## Test set 0.9314689 2.973486
writeLines("\nNaive method")##
## Naive method
accuracy(naive_ibm, ibm_test)## ME RMSE MAE MPE MAPE MASE
## Training set -0.2809365 7.302815 5.09699 -0.08262872 1.115844 1.000000
## Test set -3.7246377 20.248099 17.02899 -1.29391743 4.668186 3.340989
## ACF1 Theil's U
## Training set 0.1351052 NA
## Test set 0.9314689 2.973486
writeLines("\nDrift method")##
## Drift method
accuracy(drift_ibm, ibm_test)## ME RMSE MAE MPE MAPE
## Training set -3.916293e-14 7.297409 5.127996 -0.02530123 1.121650
## Test set 6.108138e+00 17.066963 13.974747 1.41920066 3.707888
## MASE ACF1 Theil's U
## Training set 1.006083 0.1351052 NA
## Test set 2.741765 0.9045875 2.361092
writeLines("\nMean method")##
## Mean method
accuracy(mean_ibm, ibm_test)## ME RMSE MAE MPE MAPE
## Training set 1.660438e-14 73.61532 58.72231 -2.642058 13.03019
## Test set -1.306180e+02 132.12557 130.61797 -35.478819 35.47882
## MASE ACF1 Theil's U
## Training set 11.52098 0.9895779 NA
## Test set 25.62649 0.9314689 19.05515
e_snaive_ibm <- ibm_test - snaive_ibm$mean
e_naive_ibm <- ibm_test - naive_ibm$mean
e_drift_ibm <- ibm_test - drift_ibm$mean
e_mean_ibm <- ibm_test - mean_ibm$mean
autoplot(e_snaive_ibm^2, series = "snaive method") +
autolayer(e_naive_ibm^2, series = "naive method") +
autolayer(e_drift_ibm^2, series = "drift method") +
autolayer(e_mean_ibm^2, series = "mean method") +
guides(colour = guide_legend(title = "Forecast")) +
ggtitle("Errors of the forecast of closing IBM stock price") +
ylab(expression(paste("erro", r^{2})))# Drift method did best
# Time series cross-validation method of tsCV function don't use full data unless h = 1. For example, if usable data has 100 points and h = 3, tsCV predicts 101st point with 98 points, 102nd with 99 points and 103rd with 100 points. Therefore error result value of tsCV cannot help differing from the values of accuracy function. Accuracy function always get result from full data.
ibmclose %>% tsCV(forecastfunction = snaive, h = 69) -> e_snaive_ibm_CV
ibmclose %>% tsCV(forecastfunction = naive, h = 69) -> e_naive_ibm_CV
ibmclose %>% tsCV(forecastfunction = rwf, drift = TRUE, h = 69) -> e_drift_ibm_CV
ibmclose %>% tsCV(forecastfunction = meanf, h = 69) -> e_mean_ibm_CV
autoplot(subset(e_snaive_ibm_CV^2, start = 301), series = "snaive method") +
autolayer(subset(e_naive_ibm_CV^2, start = 301), series = "naive method") +
autolayer(subset(e_drift_ibm_CV^2, start = 301), series = "drift method") +
autolayer(subset(e_mean_ibm_CV^2, start = 301), series = "mean method") +
guides(colour = guide_legend(title = "Forecast")) +
ggtitle("Errors of the forecast of closing IBM stock price",
subtitle = "after using tsCV function") +
theme(plot.title = element_text(hjust = 0.5),
plot.subtitle = element_text(hjust = 0.5)) +
ylab(expression(paste("erro", r^{2})))# Based on the returned results of tsCV function, I would've selected naive method because it yielded smallest error.# d. Check the residuals of your preferred method. Do they resemble white noise?
checkresiduals(naive_ibm)##
## Ljung-Box test
##
## data: Residuals from Naive method
## Q* = 22.555, df = 10, p-value = 0.01251
##
## Model df: 0. Total lags used: 10
checkresiduals(drift_ibm)##
## Ljung-Box test
##
## data: Residuals from Random walk with drift
## Q* = 22.555, df = 9, p-value = 0.007278
##
## Model df: 1. Total lags used: 10
# No. Even when I checked the residuals of naive method because the error values were similar to result of the drift method, they weren't like white noise, too.# a. Produce some plots of the data in order to become familiar with it.
autoplot(hsales) # we call autoplot# b. Split the hsales data set into a training set and a test set, where the test set is the last two years of data.
hsalesTrain <- subset(hsales, end = length(hsales) - 24)
hsalesTest <- subset(hsales, start = length(hsales) - 23) # from the end - 23 months# c. Try using various benchmark methods to forecast the training set and compare the results on the test set. Which method did best?
snaive_hsales <- snaive(hsalesTrain, h = 24)
naive_hsales <- naive(hsalesTrain, h = 24)
drift_hsales <- rwf(hsalesTrain, drift = TRUE, h = 24)
mean_hsales <- meanf(hsalesTrain, h = 24)
autoplot(snaive_hsales) +
autolayer(hsalesTest)autoplot(naive_hsales) +
autolayer(hsalesTest)autoplot(drift_hsales) +
autolayer(hsalesTest)autoplot(mean_hsales) +
autolayer(hsalesTest)writeLines("Snaive method")## Snaive method
accuracy(snaive_hsales, hsalesTest)## ME RMSE MAE MPE MAPE MASE
## Training set 0.1004184 10.582214 8.485356 -2.184269 17.633696 1.0000000
## Test set 1.0416667 5.905506 4.791667 0.972025 8.545729 0.5646984
## ACF1 Theil's U
## Training set 0.8369786 NA
## Test set 0.1687797 0.7091534
writeLines("\nNaive method")##
## Naive method
accuracy(naive_hsales, hsalesTest)## ME RMSE MAE MPE MAPE MASE
## Training set -0.008000 6.301111 5.000000 -0.767457 9.903991 0.5892505
## Test set 2.791667 8.628924 7.208333 2.858639 12.849194 0.8495028
## ACF1 Theil's U
## Training set 0.1824472 NA
## Test set 0.5377994 1.098358
writeLines("\nDrift method")##
## Drift method
accuracy(drift_hsales, hsalesTest)## ME RMSE MAE MPE MAPE MASE
## Training set 3.519316e-13 6.301106 4.999872 -0.7511048 9.903063 0.5892354
## Test set 2.891667e+00 8.658795 7.249000 3.0426108 12.901697 0.8542954
## ACF1 Theil's U
## Training set 0.1824472 NA
## Test set 0.5378711 1.100276
writeLines("\nMean method")##
## Mean method
accuracy(mean_hsales, hsalesTest)## ME RMSE MAE MPE MAPE MASE
## Training set 3.510503e-15 12.162811 9.532738 -6.144876 20.38306 1.1234341
## Test set 3.839475e+00 9.022555 7.561587 4.779122 13.26183 0.8911338
## ACF1 Theil's U
## Training set 0.8661998 NA
## Test set 0.5377994 1.131713
# Seasonal naive method did the best.# d. Check the residuals of your preferred method. Do they resemble white noise?
checkresiduals(snaive_hsales) ##
## Ljung-Box test
##
## data: Residuals from Seasonal naive method
## Q* = 682.2, df = 24, p-value < 2.2e-16
##
## Model df: 0. Total lags used: 24
# The residuals don't resemble white noise